# Uncomment if packages not installed
## install.packages("psych")
## install.packages("dplyr")
## install.packages("ggpubr")
## install.packages("cowplot")
## install.packages("lavaan")
## install.packages("plotly")
## install.packages("tidySEM")
# Load data
setwd('D:\\Yaxin\\HKBU BM\\Courses\\Sem 2\\ECON7860 Big Data Analytics for Business (S11)\\Group Project\\HR Analytics\\working')
rawData <- read.csv2("HR_comma_sep.csv", sep = ',')
# Check missing values
TRUE %in% is.na(rawData)
## [1] FALSE
# Have a glance of the data
colnames(rawData)
## [1] "satisfaction_level" "last_evaluation" "number_project"
## [4] "average_montly_hours" "time_spend_company" "Work_accident"
## [7] "left" "promotion_last_5years" "sales"
## [10] "salary"
str(rawData)
## 'data.frame': 14999 obs. of 10 variables:
## $ satisfaction_level : chr "0.38" "0.8" "0.11" "0.72" ...
## $ last_evaluation : chr "0.53" "0.86" "0.88" "0.87" ...
## $ number_project : int 2 5 7 5 2 2 6 5 5 2 ...
## $ average_montly_hours : int 157 262 272 223 159 153 247 259 224 142 ...
## $ time_spend_company : int 3 6 4 5 3 3 4 5 5 3 ...
## $ Work_accident : int 0 0 0 0 0 0 0 0 0 0 ...
## $ left : int 1 1 1 1 1 1 1 1 1 1 ...
## $ promotion_last_5years: int 0 0 0 0 0 0 0 0 0 0 ...
## $ sales : chr "sales" "sales" "sales" "sales" ...
## $ salary : chr "low" "medium" "medium" "low" ...
summary(rawData)
## satisfaction_level last_evaluation number_project average_montly_hours
## Length:14999 Length:14999 Min. :2.000 Min. : 96.0
## Class :character Class :character 1st Qu.:3.000 1st Qu.:156.0
## Mode :character Mode :character Median :4.000 Median :200.0
## Mean :3.803 Mean :201.1
## 3rd Qu.:5.000 3rd Qu.:245.0
## Max. :7.000 Max. :310.0
## time_spend_company Work_accident left promotion_last_5years
## Min. : 2.000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.: 3.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median : 3.000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean : 3.498 Mean :0.1446 Mean :0.2381 Mean :0.02127
## 3rd Qu.: 4.000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :10.000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## sales salary
## Length:14999 Length:14999
## Class :character Class :character
## Mode :character Mode :character
##
##
##
table(rawData$Work_accident)
##
## 0 1
## 12830 2169
table(rawData$promotion_last_5years)
##
## 0 1
## 14680 319
table(rawData$sales)
##
## accounting hr IT management marketing product_mng
## 767 739 1227 630 858 902
## RandD sales support technical
## 787 4140 2229 2720
table(rawData$salary)
##
## high low medium
## 1237 7316 6446
table(rawData$left)
##
## 0 1
## 11428 3571
# Transform "satisfaction_level" and "last_evaluation" to numeric
rawData$satisfaction_level <- as.numeric(rawData$satisfaction_level)
rawData$last_evaluation <- as.numeric(rawData$last_evaluation)
summary(rawData)
## satisfaction_level last_evaluation number_project average_montly_hours
## Min. :0.0900 Min. :0.3600 Min. :2.000 Min. : 96.0
## 1st Qu.:0.4400 1st Qu.:0.5600 1st Qu.:3.000 1st Qu.:156.0
## Median :0.6400 Median :0.7200 Median :4.000 Median :200.0
## Mean :0.6128 Mean :0.7161 Mean :3.803 Mean :201.1
## 3rd Qu.:0.8200 3rd Qu.:0.8700 3rd Qu.:5.000 3rd Qu.:245.0
## Max. :1.0000 Max. :1.0000 Max. :7.000 Max. :310.0
## time_spend_company Work_accident left promotion_last_5years
## Min. : 2.000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.: 3.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median : 3.000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean : 3.498 Mean :0.1446 Mean :0.2381 Mean :0.02127
## 3rd Qu.: 4.000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :10.000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## sales salary
## Length:14999 Length:14999
## Class :character Class :character
## Mode :character Mode :character
##
##
##
# Separate numeric variables and target variable
X <- rawData[ , c(1, 2, 3, 4, 5, 7)]
tag <- colnames(X)
y <- rawData$left
# Check for outliers (of numeric variables) with boxplots
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.0.4
## Loading required package: ggplot2
library("cowplot")
## Warning: package 'cowplot' was built under R version 4.0.4
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggpubr':
##
## get_legend
bxp1 <- ggboxplot(X[, 1], xlab = tag[1], ylab = tag[1], fill = '#999999')
bxp2 <- ggboxplot(X[, 2], xlab = tag[2], ylab = tag[2], fill = '#999999')
bxp3 <- ggboxplot(X[, 3], xlab = tag[3], ylab = tag[3], fill = '#999999')
bxp4 <- ggboxplot(X[, 4], xlab = tag[4], ylab = tag[4], fill = '#999999')
bxp5 <- ggboxplot(X[, 5], xlab = tag[5], ylab = tag[5], fill = '#999999')
plot_grid(bxp1, bxp2, bxp3, bxp4, bxp5 + rremove("x.text"),
labels = tag, ncol = 3, nrow = 2, label_size = 8)

# Identify outliers for "time_spend_company"
out <- boxplot.stats(X[ , 5])$out
table(out)
## out
## 6 7 8 10
## 718 188 162 214
length(out)
## [1] 1282
# Identify the subjects corresponding to the "extreme" values
out_ind <- which(X[ , 5] %in% out)
# Examine if the outlier dataset is a different population
X1 <- X[out_ind, ]; X2 <- X[-out_ind, ]
summary(X1)
## satisfaction_level last_evaluation number_project average_montly_hours
## Min. :0.1200 Min. :0.360 Min. :2.000 Min. : 97.0
## 1st Qu.:0.4700 1st Qu.:0.600 1st Qu.:3.000 1st Qu.:164.0
## Median :0.6650 Median :0.760 Median :4.000 Median :215.0
## Mean :0.6247 Mean :0.735 Mean :4.016 Mean :205.9
## 3rd Qu.:0.8300 3rd Qu.:0.890 3rd Qu.:5.000 3rd Qu.:248.0
## Max. :1.0000 Max. :1.000 Max. :6.000 Max. :293.0
## time_spend_company left
## Min. : 6.000 Min. :0.000
## 1st Qu.: 6.000 1st Qu.:0.000
## Median : 6.000 Median :0.000
## Mean : 7.067 Mean :0.163
## 3rd Qu.: 8.000 3rd Qu.:0.000
## Max. :10.000 Max. :1.000
summary(X2)
## satisfaction_level last_evaluation number_project average_montly_hours
## Min. :0.0900 Min. :0.3600 Min. :2.000 Min. : 96.0
## 1st Qu.:0.4400 1st Qu.:0.5600 1st Qu.:3.000 1st Qu.:156.0
## Median :0.6400 Median :0.7200 Median :4.000 Median :198.0
## Mean :0.6117 Mean :0.7143 Mean :3.783 Mean :200.6
## 3rd Qu.:0.8100 3rd Qu.:0.8700 3rd Qu.:5.000 3rd Qu.:245.0
## Max. :1.0000 Max. :1.0000 Max. :7.000 Max. :310.0
## time_spend_company left
## Min. :2.000 Min. :0.0000
## 1st Qu.:3.000 1st Qu.:0.0000
## Median :3.000 Median :0.0000
## Mean :3.165 Mean :0.2451
## 3rd Qu.:4.000 3rd Qu.:0.0000
## Max. :5.000 Max. :1.0000
## Test if mean(left) are significantly lower for X1 than for X2
alpha <- 0.05
t.test(X1$left, X2$left, alternative = 'less')
##
## Welch Two Sample t-test
##
## data: X1$left and X2$left
## t = -7.4918, df = 1623.6, p-value = 5.551e-14
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf -0.06404151
## sample estimates:
## mean of x mean of y
## 0.1630265 0.2450973
## Test if distributions of features are different between X1 and X2
ind <- c()
### satisfaction_level
if (ks.test(X1$satisfaction_level, X2$satisfaction_level)$p.value < alpha / 2) {
ind <- rbind(ind, TRUE);
print("The distribution of 'satisfaction_level' is statistically different between X1 and X2 at 0.05 level of significance.")
} else {
ind <- rbind(ind, FALSE);
print("The distribution of 'satisfaction_level' is NOT statistically different between X1 and X2 at 0.05 level of significance.")
}
## Warning in ks.test(X1$satisfaction_level, X2$satisfaction_level): p-value will
## be approximate in the presence of ties
## [1] "The distribution of 'satisfaction_level' is statistically different between X1 and X2 at 0.05 level of significance."
### last_evaluation
if (ks.test(X1$last_evaluation, X2$last_evaluation)$p.value < alpha / 2) {
ind <- rbind(ind, TRUE);
print("The distribution of 'last_evaluation' is statistically different between X1 and X2 at 0.05 level of significance.")
} else {
ind <- rbind(ind, FALSE);
print("The distribution of 'last_evaluation' is NOT statistically different between X1 and X2 at 0.05 level of significance.")
}
## Warning in ks.test(X1$last_evaluation, X2$last_evaluation): p-value will be
## approximate in the presence of ties
## [1] "The distribution of 'last_evaluation' is statistically different between X1 and X2 at 0.05 level of significance."
### number_project
if (ks.test(X1$number_project, X2$number_project)$p.value < alpha / 2) {
ind <- rbind(ind, TRUE);
print("The distribution of 'number_project' is statistically different between X1 and X2 at 0.05 level of significance.")
} else {
ind <- rbind(ind, FALSE);
print("The distribution of 'number_project' is NOT statistically different between X1 and X2 at 0.05 level of significance.")
}
## Warning in ks.test(X1$number_project, X2$number_project): p-value will be
## approximate in the presence of ties
## [1] "The distribution of 'number_project' is statistically different between X1 and X2 at 0.05 level of significance."
### average_montly_hours
if (ks.test(X1$average_montly_hours, X2$average_montly_hours)$p.value < alpha / 2) {
ind <- rbind(ind, TRUE);
print("The distribution of 'average_montly_hours' is statistically different between X1 and X2 at 0.05 level of significance.")
} else {
ind <- rbind(ind, FALSE);
print("The distribution of 'average_montly_hours' is NOT statistically different between X1 and X2 at 0.05 level of significance.")
}
## Warning in ks.test(X1$average_montly_hours, X2$average_montly_hours): p-value
## will be approximate in the presence of ties
## [1] "The distribution of 'average_montly_hours' is statistically different between X1 and X2 at 0.05 level of significance."
### time_spend_company
if (ks.test(X1$time_spend_company, X2$time_spend_company)$p.value < alpha / 2) {
ind <- rbind(ind, TRUE);
print("The distribution of 'time_spend_company' is statistically different between X1 and X2 at 0.05 level of significance.")
} else {
ind <- rbind(ind, FALSE);
print("The distribution of 'time_spend_company' is NOT statistically different between X1 and X2 at 0.05 level of significance.")
}
## Warning in ks.test(X1$time_spend_company, X2$time_spend_company): p-value will
## be approximate in the presence of ties
## [1] "The distribution of 'time_spend_company' is statistically different between X1 and X2 at 0.05 level of significance."
### left
if (ks.test(X1$left, X2$left)$p.value < alpha / 2) {
ind <- rbind(ind, TRUE);
print("The distribution of 'left' is statistically different between X1 and X2 at 0.05 level of significance.")
} else {
ind <- rbind(ind, FALSE);
print("The distribution of 'left' is NOT statistically different between X1 and X2 at 0.05 level of significance.")
}
## Warning in ks.test(X1$left, X2$left): p-value will be approximate in the
## presence of ties
## [1] "The distribution of 'left' is statistically different between X1 and X2 at 0.05 level of significance."
ind
## [,1]
## [1,] TRUE
## [2,] TRUE
## [3,] TRUE
## [4,] TRUE
## [5,] TRUE
## [6,] TRUE
#### ks.test(X1$satisfaction_level, X2$satisfaction_level)
#### ks.test(X1$last_evaluation, X2$last_evaluation)
#### ks.test(X1$number_project, X2$number_project)
#### ks.test(X1$average_montly_hours, X2$average_montly_hours)
#### ks.test(X1$time_spend_company, X2$time_spend_company)
#### ks.test(X1$left, X2$left)
## Test if distributions of features are different between X1 and X
### ks.test(X1$satisfaction_level, X$satisfaction_level)
### ks.test(X1$last_evaluation, X$last_evaluation)
### ks.test(X1$number_project, X$number_project)
### ks.test(X1$average_montly_hours, X$average_montly_hours)
### ks.test(X1$time_spend_company, X$time_spend_company)
### ks.test(X1$left, X$left)
### All tests are significant at alpha = 0.05
## Test if distributions of features are different between X2 and X
### ks.test(X2$satisfaction_level, X$satisfaction_level)
### ks.test(X2$last_evaluation, X$last_evaluation)
### ks.test(X2$number_project, X$number_project)
### ks.test(X2$average_montly_hours, X$average_montly_hours)
### ks.test(X2$time_spend_company, X$time_spend_company)
### ks.test(X2$left, X$left)
### All tests are NOT significant at alpha = 0.05 except for "time_spend_company"
# Plot the scatter plot matrices for all variables
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.4
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(psych)
## Warning: package 'psych' was built under R version 4.0.4
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## Move target variable "left" to the end of the data frame
rawData <- rawData %>% relocate(left, .after = last_col())
pairs.panels(rawData, pch = '.')

pairs.panels(rawData[out_ind, ])

pairs.panels(rawData[-out_ind, ], pch = '.')

pairs.panels(rawData, pch = '.', ellipses = FALSE)

## pairs.panels(rawData)
# Exploratory factor (principal component, and cluster) analysis
## Standardization of numerical data
X_norm <- cbind(scale(X[ , -c(6)]), y)
X1_norm <- cbind(scale(X1[ , -c(6)]), y[out_ind])
X2_norm <- cbind(scale(X2[ , -c(6)]), y[-out_ind])
## On all observations
fa.parallel(X_norm[ , -c(6)])

## Parallel analysis suggests that the number of factors = 2 and the number of components = 2
fa1 <- fa(X_norm[ , -c(6)], 1)
fa1
## Factor Analysis using method = minres
## Call: fa(r = X_norm[, -c(6)], nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## satisfaction_level -0.07 0.0051 0.99 1
## last_evaluation 0.51 0.2626 0.74 1
## number_project 0.70 0.4938 0.51 1
## average_montly_hours 0.61 0.3673 0.63 1
## time_spend_company 0.26 0.0662 0.93 1
##
## MR1
## SS loadings 1.19
## Proportion Var 0.24
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 10 and the objective function was 0.48 with Chi Square of 7153.12
## The degrees of freedom for the model are 5 and the objective function was 0.06
##
## The root mean square of the residuals (RMSR) is 0.06
## The df corrected root mean square of the residuals is 0.09
##
## The harmonic number of observations is 14999 with the empirical chi square 1142.95 with prob < 6.7e-245
## The total number of observations was 14999 with Likelihood Chi Square = 839.32 with prob < 3.6e-179
##
## Tucker Lewis Index of factoring reliability = 0.766
## RMSEA index = 0.105 and the 90 % confidence intervals are 0.1 0.112
## BIC = 791.24
## Fit based upon off diagonal values = 0.93
## Measures of factor score adequacy
## MR1
## Correlation of (regression) scores with factors 0.82
## Multiple R square of scores with factors 0.67
## Minimum correlation of possible factor scores 0.33
fa.diagram(fa1)

fa2 <- fa(X_norm[ , -c(6)], 2)
## Loading required namespace: GPArotation
fa2
## Factor Analysis using method = minres
## Call: fa(r = X_norm[, -c(6)], nfactors = 2)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 MR2 h2 u2 com
## satisfaction_level -0.01 0.71 0.502 0.50 1.0
## last_evaluation 0.57 0.20 0.346 0.65 1.2
## number_project 0.69 -0.14 0.504 0.50 1.1
## average_montly_hours 0.60 0.03 0.359 0.64 1.0
## time_spend_company 0.24 -0.12 0.077 0.92 1.4
##
## MR1 MR2
## SS loadings 1.21 0.57
## Proportion Var 0.24 0.11
## Cumulative Var 0.24 0.36
## Proportion Explained 0.68 0.32
## Cumulative Proportion 0.68 1.00
##
## With factor correlations of
## MR1 MR2
## MR1 1.00 -0.08
## MR2 -0.08 1.00
##
## Mean item complexity = 1.2
## Test of the hypothesis that 2 factors are sufficient.
##
## The degrees of freedom for the null model are 10 and the objective function was 0.48 with Chi Square of 7153.12
## The degrees of freedom for the model are 1 and the objective function was 0
##
## The root mean square of the residuals (RMSR) is 0.01
## The df corrected root mean square of the residuals is 0.03
##
## The harmonic number of observations is 14999 with the empirical chi square 22.37 with prob < 2.3e-06
## The total number of observations was 14999 with Likelihood Chi Square = 21.01 with prob < 4.6e-06
##
## Tucker Lewis Index of factoring reliability = 0.972
## RMSEA index = 0.037 and the 90 % confidence intervals are 0.024 0.051
## BIC = 11.39
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1 MR2
## Correlation of (regression) scores with factors 0.82 0.73
## Multiple R square of scores with factors 0.67 0.53
## Minimum correlation of possible factor scores 0.35 0.06
fa.diagram(fa2)

pca1 <- pca(X_norm[ , -c(6)], 1)
pca1
## Principal Components Analysis
## Call: principal(r = r, nfactors = nfactors, residuals = residuals,
## rotate = rotate, n.obs = n.obs, covar = covar, scores = scores,
## missing = missing, impute = impute, oblique.scores = oblique.scores,
## method = method, use = use, cor = cor, correct = 0.5, weight = NULL)
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 h2 u2 com
## satisfaction_level -0.12 0.014 0.99 1
## last_evaluation 0.69 0.471 0.53 1
## number_project 0.78 0.614 0.39 1
## average_montly_hours 0.74 0.552 0.45 1
## time_spend_company 0.42 0.179 0.82 1
##
## PC1
## SS loadings 1.83
## Proportion Var 0.37
##
## Mean item complexity = 1
## Test of the hypothesis that 1 component is sufficient.
##
## The root mean square of the residuals (RMSR) is 0.15
## with the empirical chi square 6422.41 with prob < 0
##
## Fit based upon off diagonal values = 0.59
fa.diagram(pca1)

pca2 <- pca(X_norm[ , -c(6)], 2)
pca2
## Principal Components Analysis
## Call: principal(r = r, nfactors = nfactors, residuals = residuals,
## rotate = rotate, n.obs = n.obs, covar = covar, scores = scores,
## missing = missing, impute = impute, oblique.scores = oblique.scores,
## method = method, use = use, cor = cor, correct = 0.5, weight = NULL)
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC2 h2 u2 com
## satisfaction_level 0.15 0.87 0.78 0.22 1.1
## last_evaluation 0.77 0.16 0.62 0.38 1.1
## number_project 0.71 -0.35 0.63 0.37 1.5
## average_montly_hours 0.75 -0.10 0.57 0.43 1.0
## time_spend_company 0.28 -0.51 0.34 0.66 1.6
##
## RC1 RC2
## SS loadings 1.76 1.17
## Proportion Var 0.35 0.23
## Cumulative Var 0.35 0.59
## Proportion Explained 0.60 0.40
## Cumulative Proportion 0.60 1.00
##
## Mean item complexity = 1.2
## Test of the hypothesis that 2 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.16
## with the empirical chi square 7832.32 with prob < 0
##
## Fit based upon off diagonal values = 0.5
fa.diagram(pca2)

### Existence of hierarchical latent factor
om <- omega(X_norm[ , -c(6)], 2, sl = FALSE)
##
## Three factors are required for identification -- general factor loadings set to be equal.
## Proceed with caution.
## Think about redoing the analysis with alternative values of the 'option' setting.

om
## Omega
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip,
## digits = digits, title = title, sl = sl, labels = labels,
## plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option,
## covar = covar)
## Alpha: 0.51
## G.6: 0.5
## Omega Hierarchical: 0.08
## Omega H asymptotic: 0.12
## Omega Total 0.62
##
## Schmid Leiman Factor loadings greater than 0.2
## g F1* F2* h2 u2 p2
## satisfaction_level- 0.20 -0.68 0.50 0.50 0.08
## last_evaluation 0.55 0.35 0.65 0.03
## number_project 0.23 0.66 0.50 0.50 0.11
## average_montly_hours 0.58 0.36 0.64 0.07
## time_spend_company 0.23 0.08 0.92 0.13
##
## With eigenvalues of:
## g F1* F2*
## 0.14 1.12 0.53
##
## general/max 0.13 max/min = 2.11
## mean percent general = 0.08 with sd = 0.04 and cv of 0.46
## Explained Common Variance of the general factor = 0.08
##
## The degrees of freedom are 1 and the fit is 0
## The number of observations was 14999 with Chi Square = 21.01 with prob < 4.6e-06
## The root mean square of the residuals is 0.01
## The df corrected root mean square of the residuals is 0.03
## RMSEA index = 0.037 and the 10 % confidence intervals are 0.024 0.051
## BIC = 11.39
##
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 5 and the fit is 0.4
## The number of observations was 14999 with Chi Square = 5994.02 with prob < 0
## The root mean square of the residuals is 0.21
## The df corrected root mean square of the residuals is 0.3
##
## RMSEA index = 0.283 and the 10 % confidence intervals are 0.277 0.289
## BIC = 5945.94
##
## Measures of factor score adequacy
## g F1* F2*
## Correlation of scores with factors 0.30 0.79 0.70
## Multiple R square of scores with factors 0.09 0.62 0.49
## Minimum correlation of factor score estimates -0.82 0.24 -0.03
##
## Total, General and Subset omega for each subset
## g F1* F2*
## Omega total for total scores and subscales 0.62 0.62 0.50
## Omega general for total scores and subscales 0.08 0.05 0.04
## Omega group for total scores and subscales 0.53 0.57 0.46
### Item cluster analysis
ic <- iclust(X_norm[ , -c(6)])

ic
## ICLUST (Item Cluster Analysis)
## Call: iclust(r.mat = X_norm[, -c(6)])
##
## Purified Alpha:
## [1] 0.51
##
## G6* reliability:
## [1] 0.76
##
## Original Beta:
## [1] 0.25
##
## Cluster size:
## [1] 5
##
## Item by Cluster Structure matrix:
## [,1]
## satisfaction_level -0.10
## last_evaluation 0.44
## number_project 0.67
## average_montly_hours 0.55
## time_spend_company 0.30
##
## With eigenvalues of:
## [1] 1
##
## Purified scale intercorrelations
## reliabilities on diagonal
## correlations corrected for attenuation above diagonal:
## [,1]
## [1,] 0.51
##
## Cluster fit = 0.44 Pattern fit = 0.98 RMSR = 0.07
### Visualization
library(plotly)
## Warning: package 'plotly' was built under R version 4.0.4
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
#### 2-dim PCA
data_PCA <- as.data.frame(cbind(pca2$scores, y))
fig_pca <- plot_ly(data_PCA, x = ~RC1, y = ~RC2, z = ~y, type = 'scatter3d',
size = I(5), color = y, colors = c('gray50', 'red'))
fig_pca <- fig_pca %>% layout(
title = 'Visualization with Numeric Variables and 2 PCs',
scene = list(
xaxis = list(title = "PC1"),
yaxis = list(title = "PC2"),
zaxis = list(title = "left")
))
fig_pca
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
#### 2-dim FA
data_FA <- as.data.frame(cbind(fa2$scores, y))
fig_fa <- plot_ly(data_FA, x = ~MR1, y = ~MR2, z = ~y, type = 'scatter3d',
size = I(5), color = y, colors = c('gray50', 'red'))
fig_fa <- fig_fa %>% layout(
title = 'Visualization with Numeric Variables and 2 Factors',
scene = list(
xaxis = list(title = "Factor 1"),
yaxis = list(title = "Factor 2"),
zaxis = list(title = "left")
))
fig_fa
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
## On X1
fa.parallel(X1_norm[ , -c(6)])

## Parallel analysis suggests that the number of factors = 0 and the number of components = 2
pca1_X1 <- pca(X1_norm[ , -c(6)], 1)
pca1_X1
## Principal Components Analysis
## Call: principal(r = r, nfactors = nfactors, residuals = residuals,
## rotate = rotate, n.obs = n.obs, covar = covar, scores = scores,
## missing = missing, impute = impute, oblique.scores = oblique.scores,
## method = method, use = use, cor = cor, correct = 0.5, weight = NULL)
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 h2 u2 com
## satisfaction_level 0.08 0.0071 0.99 1
## last_evaluation 0.65 0.4229 0.58 1
## number_project 0.60 0.3603 0.64 1
## average_montly_hours 0.68 0.4586 0.54 1
## time_spend_company -0.47 0.2237 0.78 1
##
## PC1
## SS loadings 1.47
## Proportion Var 0.29
##
## Mean item complexity = 1
## Test of the hypothesis that 1 component is sufficient.
##
## The root mean square of the residuals (RMSR) is 0.18
## with the empirical chi square 806.46 with prob < 4.6e-172
##
## Fit based upon off diagonal values = -0.49
fa.diagram(pca1_X1)

pca2_X1 <- pca(X1_norm[ , -c(6)], 2)
pca2_X1
## Principal Components Analysis
## Call: principal(r = r, nfactors = nfactors, residuals = residuals,
## rotate = rotate, n.obs = n.obs, covar = covar, scores = scores,
## missing = missing, impute = impute, oblique.scores = oblique.scores,
## method = method, use = use, cor = cor, correct = 0.5, weight = NULL)
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC2 h2 u2 com
## satisfaction_level 0.53 0.58 0.61 0.39 2.0
## last_evaluation 0.73 -0.10 0.54 0.46 1.0
## number_project 0.24 -0.69 0.53 0.47 1.2
## average_montly_hours 0.71 -0.18 0.53 0.47 1.1
## time_spend_company -0.10 0.66 0.45 0.55 1.0
##
## RC1 RC2
## SS loadings 1.38 1.29
## Proportion Var 0.28 0.26
## Cumulative Var 0.28 0.53
## Proportion Explained 0.52 0.48
## Cumulative Proportion 0.52 1.00
##
## Mean item complexity = 1.3
## Test of the hypothesis that 2 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.19
## with the empirical chi square 919.02 with prob < 7.2e-202
##
## Fit based upon off diagonal values = -0.7
fa.diagram(pca2_X1)

### Existence of hierarchical latent factor
om_X1 <- omega(X1_norm[ , -c(6)], 2, sl = FALSE)
##
## Three factors are required for identification -- general factor loadings set to be equal.
## Proceed with caution.
## Think about redoing the analysis with alternative values of the 'option' setting.

om_X1
## Omega
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip,
## digits = digits, title = title, sl = sl, labels = labels,
## plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option,
## covar = covar)
## Alpha: 0.33
## G.6: 0.32
## Omega Hierarchical: 0.13
## Omega H asymptotic: 0.3
## Omega Total 0.43
##
## Schmid Leiman Factor loadings greater than 0.2
## g F1* F2* h2 u2 p2
## satisfaction_level- -0.30 0.33 0.20 0.80 0.00
## last_evaluation 0.23 0.46 0.27 0.73 0.20
## number_project 0.27 0.44 0.28 0.72 0.26
## average_montly_hours 0.24 0.43 0.25 0.75 0.23
## time_spend_company- 0.31 0.13 0.87 0.23
##
## With eigenvalues of:
## g F1* F2*
## 0.22 0.51 0.41
##
## general/max 0.42 max/min = 1.26
## mean percent general = 0.18 with sd = 0.11 and cv of 0.57
## Explained Common Variance of the general factor = 0.19
##
## The degrees of freedom are 1 and the fit is 0
## The number of observations was 1282 with Chi Square = 2.17 with prob < 0.14
## The root mean square of the residuals is 0.01
## The df corrected root mean square of the residuals is 0.03
## RMSEA index = 0.03 and the 10 % confidence intervals are 0 0.087
## BIC = -4.99
##
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 5 and the fit is 0.12
## The number of observations was 1282 with Chi Square = 155.42 with prob < 9.4e-32
## The root mean square of the residuals is 0.11
## The df corrected root mean square of the residuals is 0.16
##
## RMSEA index = 0.153 and the 10 % confidence intervals are 0.133 0.174
## BIC = 119.64
##
## Measures of factor score adequacy
## g F1* F2*
## Correlation of scores with factors 0.38 0.61 0.57
## Multiple R square of scores with factors 0.15 0.37 0.32
## Minimum correlation of factor score estimates -0.70 -0.26 -0.36
##
## Total, General and Subset omega for each subset
## g F1* F2*
## Omega total for total scores and subscales 0.43 0.41 0.37
## Omega general for total scores and subscales 0.13 0.09 0.06
## Omega group for total scores and subscales 0.29 0.32 0.31
### Item cluster analysis
ic_X1 <- iclust(X1_norm[ , -c(6)])

ic_X1
## ICLUST (Item Cluster Analysis)
## Call: iclust(r.mat = X1_norm[, -c(6)])
##
## Purified Alpha:
## [1] 0.33
##
## G6* reliability:
## [1] 0.12
##
## Original Beta:
## [1] 0.16
##
## Cluster size:
## [1] 5
##
## Item by Cluster Structure matrix:
## [,1]
## satisfaction_level 0.01
## last_evaluation -0.30
## number_project -0.47
## average_montly_hours -0.35
## time_spend_company 0.34
##
## With eigenvalues of:
## [1] 0.55
##
## Purified scale intercorrelations
## reliabilities on diagonal
## correlations corrected for attenuation above diagonal:
## [,1]
## [1,] 0.33
##
## Cluster fit = 0.24 Pattern fit = 0.97 RMSR = 0.08
### Visualization
#### 2-dim PCA
y1 <- y[out_ind]
data_PCA_X1 <- as.data.frame(cbind(pca2_X1$scores, y1))
fig_pca_X1 <- plot_ly(data_PCA_X1, x = ~RC1, y = ~RC2, z = ~y1,
type = 'scatter3d', size = I(8),
color = y1, colors = c('gray50', 'red'))
fig_pca_X1 <- fig_pca_X1 %>% layout(
title = 'Visualization with Numeric Variables and 2 PCs (X1)',
scene = list(
xaxis = list(title = "PC1"),
yaxis = list(title = "PC2"),
zaxis = list(title = "left")
))
fig_pca_X1
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
## On X2
fa.parallel(X2_norm[ , -c(6)])

## Parallel analysis suggests that the number of factors = 2 and the number of components = 2
fa1_X2 <- fa(X2_norm[ , -c(6)], 1)
fa1_X2
## Factor Analysis using method = minres
## Call: fa(r = X2_norm[, -c(6)], nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## satisfaction_level -0.12 0.015 0.99 1
## last_evaluation 0.50 0.253 0.75 1
## number_project 0.74 0.554 0.45 1
## average_montly_hours 0.60 0.357 0.64 1
## time_spend_company 0.40 0.158 0.84 1
##
## MR1
## SS loadings 1.34
## Proportion Var 0.27
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 10 and the objective function was 0.61 with Chi Square of 8311.15
## The degrees of freedom for the model are 5 and the objective function was 0.09
##
## The root mean square of the residuals (RMSR) is 0.08
## The df corrected root mean square of the residuals is 0.11
##
## The harmonic number of observations is 13717 with the empirical chi square 1687.77 with prob < 0
## The total number of observations was 13717 with Likelihood Chi Square = 1169.63 with prob < 1.1e-250
##
## Tucker Lewis Index of factoring reliability = 0.719
## RMSEA index = 0.13 and the 90 % confidence intervals are 0.124 0.137
## BIC = 1122
## Fit based upon off diagonal values = 0.91
## Measures of factor score adequacy
## MR1
## Correlation of (regression) scores with factors 0.84
## Multiple R square of scores with factors 0.70
## Minimum correlation of possible factor scores 0.40
fa.diagram(fa1_X2)

fa2_X2 <- fa(X2_norm[ , -c(6)], 2)
fa2_X2
## Factor Analysis using method = minres
## Call: fa(r = X2_norm[, -c(6)], nfactors = 2)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 MR2 h2 u2 com
## satisfaction_level 0.00 0.93 0.86 0.14 1.0
## last_evaluation 0.57 0.17 0.33 0.67 1.2
## number_project 0.70 -0.09 0.52 0.48 1.0
## average_montly_hours 0.61 0.03 0.37 0.63 1.0
## time_spend_company 0.37 -0.19 0.18 0.82 1.5
##
## MR1 MR2
## SS loadings 1.33 0.93
## Proportion Var 0.27 0.19
## Cumulative Var 0.27 0.45
## Proportion Explained 0.59 0.41
## Cumulative Proportion 0.59 1.00
##
## With factor correlations of
## MR1 MR2
## MR1 1.0 -0.1
## MR2 -0.1 1.0
##
## Mean item complexity = 1.1
## Test of the hypothesis that 2 factors are sufficient.
##
## The degrees of freedom for the null model are 10 and the objective function was 0.61 with Chi Square of 8311.15
## The degrees of freedom for the model are 1 and the objective function was 0
##
## The root mean square of the residuals (RMSR) is 0.01
## The df corrected root mean square of the residuals is 0.04
##
## The harmonic number of observations is 13717 with the empirical chi square 35.33 with prob < 2.8e-09
## The total number of observations was 13717 with Likelihood Chi Square = 41.16 with prob < 1.4e-10
##
## Tucker Lewis Index of factoring reliability = 0.952
## RMSEA index = 0.054 and the 90 % confidence intervals are 0.041 0.069
## BIC = 31.63
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1 MR2
## Correlation of (regression) scores with factors 0.83 0.93
## Multiple R square of scores with factors 0.70 0.86
## Minimum correlation of possible factor scores 0.39 0.72
fa.diagram(fa2_X2)

pca1_X2 <- pca(X2_norm[ , -c(6)], 1)
pca1_X2
## Principal Components Analysis
## Call: principal(r = r, nfactors = nfactors, residuals = residuals,
## rotate = rotate, n.obs = n.obs, covar = covar, scores = scores,
## missing = missing, impute = impute, oblique.scores = oblique.scores,
## method = method, use = use, cor = cor, correct = 0.5, weight = NULL)
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 h2 u2 com
## satisfaction_level -0.20 0.04 0.96 1
## last_evaluation 0.66 0.43 0.57 1
## number_project 0.79 0.63 0.37 1
## average_montly_hours 0.73 0.53 0.47 1
## time_spend_company 0.58 0.33 0.67 1
##
## PC1
## SS loadings 1.96
## Proportion Var 0.39
##
## Mean item complexity = 1
## Test of the hypothesis that 1 component is sufficient.
##
## The root mean square of the residuals (RMSR) is 0.16
## with the empirical chi square 6683.92 with prob < 0
##
## Fit based upon off diagonal values = 0.65
fa.diagram(pca1_X2)

pca2_X2 <- pca(X2_norm[ , -c(6)], 2)
pca2_X2
## Principal Components Analysis
## Call: principal(r = r, nfactors = nfactors, residuals = residuals,
## rotate = rotate, n.obs = n.obs, covar = covar, scores = scores,
## missing = missing, impute = impute, oblique.scores = oblique.scores,
## method = method, use = use, cor = cor, correct = 0.5, weight = NULL)
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC2 h2 u2 com
## satisfaction_level 0.14 0.88 0.79 0.21 1.1
## last_evaluation 0.78 0.16 0.63 0.37 1.1
## number_project 0.72 -0.32 0.63 0.37 1.4
## average_montly_hours 0.75 -0.09 0.57 0.43 1.0
## time_spend_company 0.38 -0.60 0.51 0.49 1.7
##
## RC1 RC2
## SS loadings 1.85 1.27
## Proportion Var 0.37 0.25
## Cumulative Var 0.37 0.62
## Proportion Explained 0.59 0.41
## Cumulative Proportion 0.59 1.00
##
## Mean item complexity = 1.2
## Test of the hypothesis that 2 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.15
## with the empirical chi square 6248.54 with prob < 0
##
## Fit based upon off diagonal values = 0.67
fa.diagram(pca2_X2)

### Existence of hierarchical latent factor
om_X2 <- omega(X2_norm[ , -c(6)], 2, sl = FALSE)
##
## Three factors are required for identification -- general factor loadings set to be equal.
## Proceed with caution.
## Think about redoing the analysis with alternative values of the 'option' setting.

om_X2
## Omega
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip,
## digits = digits, title = title, sl = sl, labels = labels,
## plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option,
## covar = covar)
## Alpha: 0.58
## G.6: 0.56
## Omega Hierarchical: 0.12
## Omega H asymptotic: 0.16
## Omega Total 0.7
##
## Schmid Leiman Factor loadings greater than 0.2
## g F1* F2* h2 u2 p2
## satisfaction_level- 0.30 -0.88 0.86 0.14 0.10
## last_evaluation 0.54 0.33 0.67 0.05
## number_project 0.25 0.67 0.52 0.48 0.12
## average_montly_hours 0.58 0.37 0.63 0.09
## time_spend_company 0.35 0.18 0.82 0.17
##
## With eigenvalues of:
## g F1* F2*
## 0.23 1.19 0.84
##
## general/max 0.19 max/min = 1.43
## mean percent general = 0.11 with sd = 0.04 and cv of 0.41
## Explained Common Variance of the general factor = 0.1
##
## The degrees of freedom are 1 and the fit is 0
## The number of observations was 13717 with Chi Square = 41.16 with prob < 1.4e-10
## The root mean square of the residuals is 0.01
## The df corrected root mean square of the residuals is 0.04
## RMSEA index = 0.054 and the 10 % confidence intervals are 0.041 0.069
## BIC = 31.63
##
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 5 and the fit is 0.47
## The number of observations was 13717 with Chi Square = 6504.72 with prob < 0
## The root mean square of the residuals is 0.23
## The df corrected root mean square of the residuals is 0.33
##
## RMSEA index = 0.308 and the 10 % confidence intervals are 0.302 0.314
## BIC = 6457.08
##
## Measures of factor score adequacy
## g F1* F2*
## Correlation of scores with factors 0.38 0.79 0.88
## Multiple R square of scores with factors 0.15 0.63 0.78
## Minimum correlation of factor score estimates -0.71 0.25 0.55
##
## Total, General and Subset omega for each subset
## g F1* F2*
## Omega total for total scores and subscales 0.70 0.66 0.86
## Omega general for total scores and subscales 0.12 0.07 0.09
## Omega group for total scores and subscales 0.57 0.59 0.77
### Item cluster analysis
ic_X2 <- iclust(X2_norm[ , -c(6)])

ic_X2
## ICLUST (Item Cluster Analysis)
## Call: iclust(r.mat = X2_norm[, -c(6)])
##
## Purified Alpha:
## [1] 0.58
##
## G6* reliability:
## [1] 0.78
##
## Original Beta:
## [1] 0.35
##
## Cluster size:
## [1] 5
##
## Item by Cluster Structure matrix:
## [,1]
## satisfaction_level -0.16
## last_evaluation 0.44
## number_project 0.67
## average_montly_hours 0.55
## time_spend_company 0.46
##
## With eigenvalues of:
## [1] 1.2
##
## Purified scale intercorrelations
## reliabilities on diagonal
## correlations corrected for attenuation above diagonal:
## [,1]
## [1,] 0.58
##
## Cluster fit = 0.51 Pattern fit = 0.98 RMSR = 0.09
### Visualization
#### 2-dim PCA
y2 <- y[-out_ind]
data_PCA_X2 <- as.data.frame(cbind(pca2_X2$scores, y2))
fig_pca_X2 <- plot_ly(data_PCA_X2, x = ~RC1, y = ~RC2, z = ~y2, type = 'scatter3d',
size = I(5), color = y2, colors = c('gray50', 'red'))
fig_pca_X2 <- fig_pca_X2 %>% layout(
title = 'Visualization with Numeric Variables and 2 PCs (X2)',
scene = list(
xaxis = list(title = "PC1"),
yaxis = list(title = "PC2"),
zaxis = list(title = "left")
))
fig_pca_X2
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
#### 2-dim FA
data_FA_X2 <- as.data.frame(cbind(fa2_X2$scores, y2))
fig_fa_X2 <- plot_ly(data_FA_X2, x = ~MR1, y = ~MR2, z = ~y2, type = 'scatter3d',
size = I(5), color = y2, colors = c('gray50', 'red'))
fig_fa_X2 <- fig_fa_X2 %>% layout(
title = 'Visualization with Numeric Variables and 2 Factors (X2)',
scene = list(
xaxis = list(title = "Factor 1"),
yaxis = list(title = "Factor 2"),
zaxis = list(title = "left")
))
fig_fa_X2
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
# Confirmatory factor analysis
library(lavaan)
## Warning: package 'lavaan' was built under R version 4.0.4
## This is lavaan 0.6-8
## lavaan is FREE software! Please report any bugs.
##
## Attaching package: 'lavaan'
## The following object is masked from 'package:psych':
##
## cor2cov
## One-factor model
m1 <- 'f1 =~ number_project + average_montly_hours + last_evaluation'
CFA1 <- cfa(m1, X_norm[ , -c(6)])
summary(CFA1, fit.measures=TRUE,standardized=TRUE)
## lavaan 0.6-8 ended normally after 21 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 6
##
## Number of observations 14999
##
## Model Test User Model:
##
## Test statistic 0.000
## Degrees of freedom 0
##
## Model Test Baseline Model:
##
## Test statistic 5619.829
## Degrees of freedom 3
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 1.000
## Tucker-Lewis Index (TLI) 1.000
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -61036.562
## Loglikelihood unrestricted model (H1) -61036.562
##
## Akaike (AIC) 122085.125
## Bayesian (BIC) 122130.819
## Sample-size adjusted Bayesian (BIC) 122111.752
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.000
## 90 Percent confidence interval - lower 0.000
## 90 Percent confidence interval - upper 0.000
## P-value RMSEA <= 0.05 NA
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.000
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## f1 =~
## number_project 1.000 0.655 0.655
## avrg_mntly_hrs 0.973 0.025 39.067 0.000 0.637 0.637
## last_evaluatin 0.814 0.020 39.777 0.000 0.533 0.533
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .number_project 0.571 0.012 46.062 0.000 0.571 0.571
## .avrg_mntly_hrs 0.594 0.012 49.236 0.000 0.594 0.594
## .last_evaluatin 0.715 0.011 66.229 0.000 0.715 0.716
## f1 0.429 0.014 30.329 0.000 1.000 1.000
## Two-factor model
m2 <- 'f1 =~ number_project + average_montly_hours + last_evaluation
f2 =~ time_spend_company'
CFA2 <- cfa(m2, X_norm[ , -c(6)])
summary(CFA2,fit.measures=TRUE,standardized=TRUE)
## lavaan 0.6-8 ended normally after 22 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 8
##
## Number of observations 14999
##
## Model Test User Model:
##
## Test statistic 48.919
## Degrees of freedom 2
## P-value (Chi-square) 0.000
##
## Model Test Baseline Model:
##
## Test statistic 6302.782
## Degrees of freedom 6
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.993
## Tucker-Lewis Index (TLI) 0.978
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -82001.705
## Loglikelihood unrestricted model (H1) -81977.245
##
## Akaike (AIC) 164019.409
## Bayesian (BIC) 164080.335
## Sample-size adjusted Bayesian (BIC) 164054.912
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.040
## 90 Percent confidence interval - lower 0.030
## 90 Percent confidence interval - upper 0.050
## P-value RMSEA <= 0.05 0.958
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.013
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## f1 =~
## number_project 1.000 0.675 0.675
## avrg_mntly_hrs 0.917 0.022 41.080 0.000 0.619 0.619
## last_evaluatin 0.786 0.019 40.614 0.000 0.530 0.530
## f2 =~
## tim_spnd_cmpny 1.000 1.000 1.000
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## f1 ~~
## f2 0.171 0.007 23.575 0.000 0.254 0.254
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .number_project 0.544 0.012 44.679 0.000 0.544 0.544
## .avrg_mntly_hrs 0.617 0.011 54.339 0.000 0.617 0.617
## .last_evaluatin 0.719 0.011 67.455 0.000 0.719 0.719
## .tim_spnd_cmpny 0.000 0.000 0.000
## f1 0.456 0.014 32.005 0.000 1.000 1.000
## f2 1.000 0.012 86.600 0.000 1.000 1.000
## Visualization of CFA
library(tidySEM)
## Warning: package 'tidySEM' was built under R version 4.0.4
## Registered S3 methods overwritten by 'tidySEM':
## method from
## print.mplus.model MplusAutomation
## print.mplusObject MplusAutomation
## summary.mplus.model MplusAutomation
##
## Attaching package: 'tidySEM'
## The following object is masked from 'package:plotly':
##
## add_paths
graph_sem(CFA1)

graph_sem(CFA2)

save.image("D:/Yaxin/HKBU BM/Courses/Sem 2/ECON7860 Big Data Analytics for Business (S11)/Group Project/HR Analytics/working/EDA.RData")